# Read the data file and run analyses.

setwd("F:/Experiment Data/Complex Contagion/ComplexContagionDatasets/")
exp1 = read.csv("Exp1_innocuous_questions.csv")

# Sanity checks.
head(exp1)
dim(exp1)
table(exp1$finished)
table(exp1$conditionName)

# Code dependent variables.
# Code Self, Schis and Other columns.
exp1$Self = 0
exp1[exp1$userChoice == "Keep_10_cents_and_Do_Not_Donate", c("Self")] = 1
exp1$Schis = 0
exp1[exp1$userChoice == "Schistosomiasis_Control_Initiative", c("Schis")] = 1
exp1$Other = 0
exp1[exp1$Self == 0 & exp1$Schis == 0, c("Other")] = 1

# Code AnyCharity column.
exp1$AnyCharity = 0
exp1[exp1$userChoice != "Keep_10_cents_and_Do_Not_Donate", c("AnyCharity")] = 1

# Code independent variables.
# Code Tie Strength column.
exp1$TieStrength = "High"
exp1[grepl("^AllLo__", exp1$conditionName), c("TieStrength")] = "Low"
#exp1[, c("conditionName", "TieStrength")]

# Code SelfNeighbors, SchisNeighbors and OtherNeighbors columns.
exp1$SelfNeighbors = 0
exp1[grepl("Self_1", exp1$conditionName), c("SelfNeighbors")] = 1
exp1[grepl("Self_2", exp1$conditionName), c("SelfNeighbors")] = 2
exp1[grepl("Self_3", exp1$conditionName), c("SelfNeighbors")] = 3

exp1$SchisNeighbors = 0
exp1[grepl("Schis_1", exp1$conditionName), c("SchisNeighbors")] = 1
exp1[grepl("Schis_2", exp1$conditionName), c("SchisNeighbors")] = 2
exp1[grepl("Schis_3", exp1$conditionName), c("SchisNeighbors")] = 3

exp1$OtherNeighbors = 0
exp1[grepl("Rand_1", exp1$conditionName), c("OtherNeighbors")] = 1
exp1[grepl("Rand_2", exp1$conditionName), c("OtherNeighbors")] = 2
exp1[grepl("Rand_3", exp1$conditionName), c("OtherNeighbors")] = 3

exp1[, c("conditionName", "TieStrength", "SelfNeighbors", "SchisNeighbors", "OtherNeighbors")]


# Influence curves.  First split by tie strength, then together.
# UserChoice is Schis by Schis neighbor count.  Split by Tie Strength.
aggregate(Schis ~ TieStrength + SchisNeighbors, exp1, mean)
aggregate(Schis ~ SchisNeighbors, exp1, mean)

# Means will give us the proportions, but let's also get the raw numerator and denominator.
exp1$countMe = 1
aggregate(Schis ~ TieStrength + SchisNeighbors, exp1, sum)
aggregate(countMe ~ TieStrength + SchisNeighbors, exp1, sum)
table(exp1$SchisNeighbors)


# Test linear versus exponential fit for the Schis outcome as function of neighbor count.
schisDataToFit = as.data.frame(aggregate(Schis ~ SchisNeighbors, exp1, mean))
linearModel = lm(Schis ~ SchisNeighbors, data=schisDataToFit)
summary(linearModel)
expModel = lm(log(Schis) ~ SchisNeighbors, data=schisDataToFit)
summary(expModel)

# Compare models on plot.
png("exp1_model_comparison.png")
plot(schisDataToFit$SchisNeighbors, schisDataToFit$Schis, pch=16, xlab = "SchisNeighbors", ylab = "Percent Choosing Schis")
xValues = seq(0, 3, 0.1)
expModelPredictions = exp(predict(expModel, list(SchisNeighbors=xValues)))
lines(xValues, expModelPredictions, lwd=2, col = "red")
linearModelPredictions = predict(linearModel, list(SchisNeighbors=xValues))
lines(xValues, linearModelPredictions, lwd=2, col = "blue", lty="longdash")
legend("topleft", inset=0.05, legend=c("Linear Fit", "Exponential Fit"), col=c("blue", "red"), lwd=c(2,2), lty=c("longdash", "solid"))
dev.off()

# Compare nested models using ANOVA.
linearModel = lm(Schis ~ SchisNeighbors, data=schisDataToFit)
linearModelWithZeroIndicator = lm(Schis ~ SchisNeighbors + I(SchisNeighbors==0), data=schisDataToFit)
summary(linearModelWithZeroIndicator)
anova(linearModel, linearModelWithZeroIndicator, test="F")

linearModel = lm(Schis ~ SchisNeighbors, data=schisDataToFit)
linearModelWithExpAlterCounts = lm(Schis ~ SchisNeighbors + I(exp(SchisNeighbors)), data=schisDataToFit)
summary(linearModelWithExpAlterCounts)
anova(linearModel, linearModelWithExpAlterCounts, test="F")


# Create influence curve per tie strength plot.
infCurveByTieStrengthNumbers = aggregate(Schis ~ TieStrength + SchisNeighbors, exp1, mean)
highPoints = infCurveByTieStrengthNumbers[infCurveByTieStrengthNumbers$TieStrength == "High", c("SchisNeighbors", "Schis")]
lowPoints = infCurveByTieStrengthNumbers[infCurveByTieStrengthNumbers$TieStrength == "Low", c("SchisNeighbors", "Schis")]
png("exp1_inf_line_by_tie_strength.png")
plot(highPoints$SchisNeighbors, highPoints$Schis, pch=16, col="#17202a", cex=1.25, xlab = "SCI Neighbors", ylab = "Proportion Choosing SCI", ylim=c(0, 0.15))
points(lowPoints$SchisNeighbors, lowPoints$Schis, pch=17, col="#99a3a4", cex=1.25)
# Plot a linear fit to the High points.
highFit = lm(Schis ~ SchisNeighbors, data=highPoints)
xValues = seq(0, 3, 0.1)
highFitPredictions = predict(highFit, list(SchisNeighbors=xValues))
lines(xValues, highFitPredictions, lwd=2, lty="solid", col="#17202a")
# Plot a linear fit to the Low points.
lowFit = lm(Schis ~ SchisNeighbors, data=lowPoints)
xValues = seq(0, 3, 0.1)
lowFitPredictions = predict(lowFit, list(SchisNeighbors=xValues))
lines(xValues, lowFitPredictions, lwd=2, lty="dashed", col="#99a3a4")
legend("topleft", inset=0.05, legend=c("High Tie Strength", "Low Tie Strength"), pch=c(16, 17), col=c("#17202a", "#99a3a4"), lwd=c(2,2), lty=c("solid", "dashed"))
dev.off()

